https://fred.stlouisfed.org/series/DTWEXAFEGS
https://fred.stlouisfed.org/series/DTWEXEMEGS
https://fred.stlouisfed.org/series/DEXCHUS
https://www.kaggle.com/cristiangarrido/covid19geographicdistributionworldwide?select=Countries_ISO.csv
https://www.kaggle.com/fireballbyedimyrnmom/us-counties-covid-19-dataset
https://www.kaggle.com/aayushmishra1512/faang-complete-stock-data
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
import seaborn as sns
from numpy.polynomial.polynomial import polyfit
import matplotlib.gridspec as gridspec
import numpy as np
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault)
import math
import os
import requests, io
import zipfile as zf
import shutil
import statsmodels.formula.api as smf
import matplotlib.ticker as mtick
from sklearn.neighbors import KNeighborsRegressor as knn
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression as linreg
from scipy.stats import kde
import matplotlib.dates as mdates
from sklearn.preprocessing import MinMaxScaler
import plotly.graph_objects as go
%matplotlib inline
path_world_covid = 'COVID-19-geographic-disbtribution-worldwide.csv'
world_covid = pd.read_csv(path_world_covid)
world_covid['dateRep'] = pd.to_datetime(world_covid['dateRep'])
world_covid
world_covid['geoId'].nunique()
world_covid['countriesAndTerritories'].nunique()
#drop the countries that aren't in world COVID
path_country_coordinate = 'countries.csv'
country_coord = pd.read_csv(path_country_coordinate)
#country_coord
path_us_counties = 'us-counties.csv'
us_counties = pd.read_csv(path_us_counties)
us_counties['date'] = pd.to_datetime(us_counties['date'])
#us_counties
path_netflix = 'Netflix.csv'
netflix = pd.read_csv(path_netflix).tail(365)
netflix['Date'] = pd.to_datetime(netflix['Date'])
netflix = netflix[netflix['Date'].dt.year == 2020]
#pd.reset_option('display.max_rows', None)
#pd.set_option('display.max_rows', None)
#netflix
path_amazon = 'Amazon.csv'
amazon = pd.read_csv(path_amazon).tail(365)
amazon['Date'] = pd.to_datetime(amazon['Date'])
amazon = amazon[amazon['Date'].dt.year == 2020]
#pd.reset_option('display.max_rows', None)
#amazon
path_google = 'Google.csv'
google = pd.read_csv(path_google).tail(365)
google['Date'] = pd.to_datetime(google['Date'])
google = google[google['Date'].dt.year == 2020]
#pd.reset_option('display.max_rows', None)
#google
path_apple = 'Apple.csv'
apple = pd.read_csv(path_apple).tail(365)
apple['Date'] = pd.to_datetime(apple['Date'])
apple = apple[apple['Date'].dt.year == 2020]
#pd.reset_option('display.max_rows', None)
#apple
path_facebook = 'Facebook.csv'
facebook = pd.read_csv(path_facebook).tail(365)
facebook = facebook
facebook['Date'] = pd.to_datetime(facebook['Date'])
facebook = facebook[facebook['Date'].dt.year == 2020]
#pd.reset_option('display.max_rows', None)
#facebook
path_usd_adv_econ = 'DTWEXAFEGS.csv'
usd_adv_econ = pd.read_csv(path_usd_adv_econ)
usd_adv_econ['DATE'] = pd.to_datetime(usd_adv_econ['DATE'])
usd_adv_econ = usd_adv_econ[usd_adv_econ['DATE'].dt.year == 2020]
#pd.reset_option('display.max_rows', None)
#usd_adv_econ
path_usd_em_econ = 'DTWEXEMEGS.csv'
usd_em_econ = pd.read_csv(path_usd_em_econ)
usd_em_econ['DATE'] = pd.to_datetime(usd_em_econ['DATE'])
usd_em_econ = usd_em_econ[usd_em_econ['DATE'].dt.year == 2020]
#usd_em_econ
path_usd_rmb = 'DEXCHUS.csv'
usd_rmb = pd.read_csv(path_usd_rmb)
usd_rmb['DATE'] = pd.to_datetime(usd_rmb['DATE'])
usd_rmb = usd_rmb[usd_rmb['DATE'].dt.year == 2020]
#usd_rmb
# merge stocks
netflix2 = netflix.drop(columns = ['Open','High','Low','Adj Close','Volume'])
netflix2 = netflix2.rename(columns={"Close": "Close_Netflix"})
google2 = google.drop(columns = ['Open','High','Low','Adj Close','Volume'])
google2 = google2.rename(columns={"Close": "Close_Google"})
amazon2 = amazon.drop(columns = ['Open','High','Low','Adj Close','Volume'])
amazon2 = amazon2.rename(columns={"Close": "Close_Amazon"})
apple2 = apple.drop(columns = ['Open','High','Low','Adj Close','Volume'])
apple2 = apple2.rename(columns={"Close": "Close_Apple"})
facebook2 = facebook.drop(columns = ['Open','High','Low','Adj Close','Volume'])
facebook2 = facebook2.rename(columns={"Close": "Close_FB"})
faangs = facebook2.merge(apple2, on = 'Date', how = 'left')
faangs = faangs.merge(amazon2, on = 'Date', how = 'left')
faangs = faangs.merge(google2, on = 'Date', how = 'left')
faangs = faangs.merge(netflix2, on = 'Date', how = 'left')
faangs['Date'] = faangs['Date'].dt.strftime('%Y-%m-%d')
faangs.drop([147,148,149,150,151,152,153,154,155,156,157,158], inplace = True)
#faangs
faangs['Month'] = faangs.Date.str[6:7]
#April
faangs_april = faangs.loc[faangs['Month'] == '4']
faangs_april['Day_Close'] = (faangs_april['Close_FB'] + faangs_april['Close_Apple'] +
faangs_april['Close_Amazon'] + faangs_april['Close_Google'] +
faangs_april['Close_Netflix'])
faangs_april['Date'] = pd.to_datetime(faangs_april['Date'])
f4_piv = faangs_april.pivot_table(index = 'Date',columns = 'Month',values = 'Day_Close', aggfunc = 'sum')
f4_piv = f4_piv.pct_change()
f4_piv
#May
faangs_may = faangs.loc[faangs['Month'] == '5']
faangs_may['Day_Close'] = (faangs_may['Close_FB'] + faangs_may['Close_Apple'] +
faangs_may['Close_Amazon'] + faangs_may['Close_Google'] +
faangs_may['Close_Netflix'])
faangs_may['Date'] = pd.to_datetime(faangs_may['Date'])
f5_piv = faangs_may.pivot_table(index = 'Date',columns = 'Month',values = 'Day_Close', aggfunc = 'sum')
f5_piv = f5_piv.pct_change()
#f5_piv
#June
faangs_june = faangs.loc[faangs['Month'] == '6']
faangs_june['Day_Close'] = (faangs_june['Close_FB'] + faangs_june['Close_Apple'] +
faangs_june['Close_Amazon'] + faangs_june['Close_Google'] +
faangs_june['Close_Netflix'])
faangs_june['Date'] = pd.to_datetime(faangs_june['Date'])
f6_piv = faangs_june.pivot_table(index = 'Date',columns = 'Month',values = 'Day_Close', aggfunc = 'sum')
f6_piv = f6_piv.pct_change()
#f6_piv
#July
faangs_july = faangs.loc[faangs['Month'] == '7']
faangs_july['Day_Close'] = (faangs_july['Close_FB'] + faangs_july['Close_Apple'] +
faangs_july['Close_Amazon'] + faangs_july['Close_Google'] +
faangs_july['Close_Netflix'])
faangs_july['Date'] = pd.to_datetime(faangs_july['Date'])
f7_piv = faangs_july.pivot_table(index = 'Date',columns = 'Month',values = 'Day_Close', aggfunc = 'sum')
f7_piv = f7_piv.pct_change()
#f7_piv
#f6_piv = f6_piv.reset_index()
#f6_piv.dtypes
#f6_piv.ix[index.to_datetime()]
f6_piv.index
#fig, ax = plt.subplots()
#ax.axhline(june_mean, color = 'red',linestyle = 'dashed')
#f6_piv.plot.line(ax = ax,x = 'Date', y = '6')
country_coord = country_coord.rename(columns={"country": "geoId"})
#country_coord
covid_location = world_covid.merge(country_coord, on = 'geoId', how = 'left', indicator = True)
covid_location = covid_location[covid_location['_merge']=='both']
covid_location = covid_location.drop(columns = ['day','year',
'popData2019','name','Cumulative_number_for_14_days_of_COVID-19_cases_per_100000'])
covid_location = covid_location.rename(columns={"dateRep": "Date","continentExp":"Continent",
"countriesAndTerritories":"Country","latitude":"Latitude",
"longitude":"Longitude","cases":"New Cases",
'deaths':"Deaths"})
covid_location['month'] = covid_location['month'].astype(str)
covid_location = covid_location.loc[(covid_location['month'].str.contains('4|5|6|7|8') == True),:]
covid_location = covid_location.groupby(['month','Date','Country',
'Continent','Latitude','Longitude','countryterritoryCode'], as_index = False)[['New Cases', 'Deaths']].sum()
covid_location.loc[covid_location['Country'] == 'Afghanistan',:]
covid_location['month'] = covid_location['month'].astype(int)
covid_april = covid_location.loc[covid_location['month'] == 4]
covid_april = covid_april.groupby(['Country','Latitude','Longitude','Continent','month','countryterritoryCode'], as_index = False)[['New Cases', 'Deaths']].sum()
covid_may = covid_location.loc[covid_location['month'] == 5]
covid_may = covid_may.groupby(['Country','Latitude','Longitude','Continent','month','countryterritoryCode'], as_index = False)[['New Cases', 'Deaths']].sum()
covid_june = covid_location.loc[covid_location['month'] == 6]
covid_june = covid_june.groupby(['Country','Latitude','Longitude','Continent','month','countryterritoryCode'], as_index = False)[['New Cases', 'Deaths']].sum()
covid_july = covid_location.loc[covid_location['month'] == 7]
covid_july = covid_july.groupby(['Country','Latitude','Longitude','Continent','month','countryterritoryCode'], as_index = False)[['New Cases', 'Deaths']].sum()
covid_july
world_covid = world_covid.rename(columns={"dateRep": "Date"})
cmap = mpl.cm.RdYlGn
reversed_cmap = cmap.reversed()
n = mpl.colors.Normalize()
plt.style.use('dark_background')
fig, ax = plt.subplots(ncols = 2,nrows = 4)
plt.subplots_adjust(bottom = .7)
#APRIL
covid_april.plot.scatter(ax = ax[0,0],figsize=(20,30), y='Latitude',x='Longitude',s=covid_april['New Cases'] * .01,
color=reversed_cmap(n(covid_april['Deaths'].values * 2500)),
edgecolors = 'white',alpha = .9)
ax[0,0].set_title("Global COVID-19 (April)", fontsize = 16, fontweight = 'bold')
f4_piv.plot.line(ax = ax[0,1])
ax[0,1].axhline(0, color = 'white')
april_mean = f4_piv['4'].mean()
ax[0,1].axhline(april_mean, color = 'red',linestyle = 'dashed')
ax[0,1].legend(bbox_to_anchor = (1, 1), labels = ['Growth Rate','Zero Growth','Mean Rate'])
ax[0,1].set_title("FAANG Growth Rate (April)", fontsize = 16, fontweight = 'bold')
ax[0,1].set_ylim(-.06, .06)
ax[0,1].set_xlabel(xlabel = '')
ax[0,1].yaxis.set_major_formatter(mtick.PercentFormatter(1.0))
#MAY
covid_may.plot.scatter(ax = ax[1,0], y='Latitude',x='Longitude',s=covid_may['New Cases'] * .01,
color=reversed_cmap(n(covid_may['Deaths'].values * 2500)),
edgecolors = 'white',alpha = .9)
ax[1,0].set_title("Global COVID-19 (May)", fontsize = 16, fontweight = 'bold')
ax[1,0].set_xlabel(xlabel = '')
f5_piv.plot.line(ax = ax[1,1])
ax[1,1].axhline(0, color = 'white')
may_mean = f5_piv['5'].mean()
ax[1,1].axhline(may_mean, color = 'red',linestyle = 'dashed')
ax[1,1].legend().remove()
ax[1,1].set_title("FAANG Growth Rate (May)", fontsize = 16, fontweight = 'bold')
ax[1,1].set_ylim(-.06, .06)
ax[1,1].set_xlabel(xlabel = '')
ax[1,1].yaxis.set_major_formatter(mtick.PercentFormatter(1.0))
#JUNE
covid_june.plot.scatter(ax = ax[2,0], y='Latitude',x='Longitude',s=covid_june['New Cases'] * .01,
color=reversed_cmap(n(covid_june['Deaths'].values * 2500)),
edgecolors = 'white',alpha = .9)
ax[2,0].set_title("Global COVID-19 (June)", fontsize = 16, fontweight = 'bold')
ax[2,0].set_xlabel(xlabel = '')
f6_piv.plot.line(ax = ax[2,1])
ax[2,1].set_title("FAANG Growth Rate (June)", fontsize = 16, fontweight = 'bold')
ax[2,1].axhline(0, color = 'white')
june_mean = f6_piv['6'].mean()
ax[2,1].axhline(june_mean, color = 'red',linestyle = 'dashed')
ax[2,1].legend().remove()
ax[2,1].set_ylim(-.06, .06)
ax[2,1].set_xlabel(xlabel = '')
ax[2,1].yaxis.set_major_formatter(mtick.PercentFormatter(1.0))
#JULY
covid_july.plot.scatter(ax = ax[3,0], y='Latitude',x='Longitude',s=covid_july['New Cases'] * .01,
color=reversed_cmap(n(covid_july['Deaths'].values * 2500)),
edgecolors = 'white',alpha = .9)
ax[3,0].set_title("Global COVID-19 (July)", fontsize = 16, fontweight = 'bold')
f7_piv.plot.line(ax = ax[3,1])
ax[3,1].axhline(0, color = 'white')
july_mean = f7_piv['7'].mean()
ax[3,1].axhline(july_mean, color = 'red',linestyle = 'dashed')
ax[3,1].legend().remove()
ax[3,1].set_title("FAANG Growth Rate (July)", fontsize = 16, fontweight = 'bold')
ax[3,1].set_ylim(-.06, .06)
ax[3,1].yaxis.set_major_formatter(mtick.PercentFormatter(1.0))
plt.savefig('Covid_stocks.png')
fig_a = go.Figure(data=go.Choropleth(
locations = covid_april['countryterritoryCode'],
z = covid_april['New Cases'],
text = covid_april['Country'],
colorscale = 'RdYlGn',
autocolorscale=False,
reversescale=True,
marker_line_color='darkgray',
marker_line_width=0.5,
colorbar_title = 'New Cases',))
fig_a.update_layout(
title_text='April COVID Cases',
geo=dict(
showframe=False,
showcoastlines=True,
projection_type='equirectangular'
),
annotations = [dict(
x=0.55,
y=0.1,
xref='paper',
yref='paper',
text='Source: Kaggle',
showarrow = False
)]
)
fig_m = go.Figure(data=go.Choropleth(
locations = covid_may['countryterritoryCode'],
z = covid_may['New Cases'],
text = covid_may['Country'],
colorscale = 'RdYlGn',
autocolorscale=False,
reversescale=True,
marker_line_color='darkgray',
marker_line_width=0.5,
colorbar_title = 'New Cases',))
fig_m.update_layout(
title_text='May COVID Cases',
geo=dict(
showframe=False,
showcoastlines=True,
projection_type='equirectangular'
),
annotations = [dict(
x=0.55,
y=0.1,
xref='paper',
yref='paper',
text='Source: Kaggle',
showarrow = False
)]
)
fig_june = go.Figure(data=go.Choropleth(
locations = covid_june['countryterritoryCode'],
z = covid_june['New Cases'],
text = covid_june['Country'],
colorscale = 'RdYlGn',
autocolorscale=False,
reversescale=True,
marker_line_color='darkgray',
marker_line_width=0.5,
colorbar_title = 'New Cases',))
fig_june.update_layout(
title_text='June COVID Cases',
geo=dict(
showframe=False,
showcoastlines=True,
projection_type='equirectangular'
),
annotations = [dict(
x=0.55,
y=0.1,
xref='paper',
yref='paper',
text='Source: Kaggle',
showarrow = False
)]
)
fig_july = go.Figure(data=go.Choropleth(
locations = covid_july['countryterritoryCode'],
z = covid_july['New Cases'],
text = covid_july['Country'],
colorscale = 'RdYlGn',
autocolorscale=False,
reversescale=True,
marker_line_color='darkgray',
marker_line_width=0.5,
colorbar_title = 'New Cases',
))
fig_july.update_layout(
title_text='July COVID Cases',
geo=dict(
showframe=False,
showcoastlines=True,
projection_type='equirectangular'
),
annotations = [dict(
x=0.55,
y=0.1,
xref='paper',
yref='paper',
text='Source: Kaggle',
showarrow = False
)]
)
The column one scatter plots visualize the relationship between the number of new COVID-19 cases (size of bubble) and the number of COVID-19 related deaths (color of bubble) per country from April to July. Smaller and green bubbles are indicative of fewer new cases and deaths, whereas larger and red bubbles are the opposite. The general trends by continent are as follows:
-In North America the United States experiences a decrease in monthly deaths and cases from April to June. However, in July, the resurgence of new cases in America reaches new heights. Canada sees a general of decreasing cases with consistently low fatalities. In Mexico the graphs indicate an increase in both cases and deaths.
-In South America, Brazil is the serious case of the trend, which is increasing new cases and deaths. Brazil’s situation can be considered a result of the lack of lockdown measures.
-Europe undergoes a decrease in both new cases and deaths during the summer months. Likely a result of strict lockdown measures utilized during the first wave of COVID. Russia is an exception and experiences higher levels of new cases.
-Africa does see an increase in cases, especially in South Africa. However, there does not appear to be a substantial increase in COVID-19 related deaths.
-Both Australia and New Zealand retain both low case growth and fatalities.
-In Asia there is a contrast between East Asia and India/Middle East. East Asian countries have near miniscule new cases and deaths, with exception to Japan, which experienced case growth in July. However, the Middle East and India are the opposite, experiencing both an increase in new cases and deaths throughout the summer. India is the most notable as it appears to have the largest case and death increase in Asia. There is also an increase in new cases in South East Asia countries: Indonesia and the Philippines.
Column two line charts depict the aggregated growth rate of FAANG stocks in the aforementioned time period. The dashed, red line represents the mean growth rate for that month. In April the FAANG growth rate was larger, slightly higher than 1%, however from May-July that rate was depressed and near zero growth (~.3%)
What is surprising is the FAANG growth rate was higher when the United States’ death count was its worst and started to decrease as deaths decreased, but cases rose. Potentially due to stock speculation.
Given that East Asian and European cases are low/decreasing there does not seem to be much of a visual relationship between these continents and the FAANG growth rate. In fact when the new cases were at the highest level in the United States in July, the FAANG growth rate had slightly increased from .32% in June to .35% in July.
Monthly FAANG growth rates are below:
April FAANG Rate: 1.18%
May FAANG Rate: 0.37%
June FAANG Rate: 0.32%
July FAANG Rate: 0.35%
#print("April FAANG Rate: {:.2%}".format(april_mean))
#print("May FAANG Rate: {:.2%}".format(may_mean))
#print("June FAANG Rate: {:.2%}".format(june_mean))
#print("July FAANG Rate: {:.2%}".format(july_mean))
#us_counties
#usd_em_econ
usd_afe = usd_adv_econ[usd_adv_econ['DATE'].dt.date.astype(str) >= '2020-01-21']
#usd_afe
usd_em = usd_em_econ[usd_em_econ['DATE'].dt.date.astype(str) >= '2020-01-21']
#usd_em
us_covid = us_counties[us_counties['date'].dt.date.astype(str) <= '2020-11-06']
us_covid = us_covid.drop(columns = ['fips'], axis = 1)
us_covid = us_covid.set_index(['date'])
us_covid = us_covid.rename(columns={"date":"Date","state":"State","cases": "Cases","deaths":"Deaths"})
#us_covid = us_covid.iloc[::,:]
#us_covid = us_covid.groupby(['Date','State'])['Cases','Deaths'].sum()
us_covid = us_covid.groupby([(us_covid.index.month)]).sum()
#piv = us_covid.pivot_table(index = 'date',columns = 'State', values = 'Cases', aggfunc = 'sum')
#piv.pct_change()
#us_covid
#usd_afe
usd_em
usd = usd_em.merge(usd_afe, on = 'DATE', how = 'left')
usd['DTWEXAFEGS'] = pd.to_numeric(usd['DTWEXAFEGS'],errors = 'coerce')
usd['DTWEXEMEGS'] = pd.to_numeric(usd['DTWEXEMEGS'],errors = 'coerce')
numeric = usd.copy()
usd.dtypes
numeric['EM_Rolling'] = numeric.iloc[:,1].rolling(window=5).mean()
numeric['AFE_Rolling'] = numeric.iloc[:,2].rolling(window=5).mean()
#numeric
usd_pct = usd.copy()
usd_pct['DTWEXEMEGS'] = pd.to_numeric(usd_pct['DTWEXEMEGS'],errors = 'coerce').pct_change()
usd_pct['DTWEXAFEGS'] = pd.to_numeric(usd_pct['DTWEXAFEGS'],errors = 'coerce').pct_change()
plt.style.use('dark_background')
fig2 = plt.figure(constrained_layout = True, figsize = (15,10))
gs = fig2.add_gridspec(2,2)
covid = fig2.add_subplot(gs[:,0])
us_covid[['Cases','Deaths']].plot(ax = covid, color = ['cyan','red'])
covid.set_yscale('log')
covid.set_title('Monthly U.S. COVID-19 Cases & Deaths', fontsize = 16, fontweight = 'bold', fontstyle = 'oblique')
covid.set_xlabel('Month', fontsize = 14)
covid.set_ylabel('Cases & Deaths (log-scale)', fontsize = 14)
covid.set_xticklabels(['','February', 'April','June','August','October'], fontsize = 10, rotation = 'horizontal')
covid.grid(color = 'white', linestyle = '-.', linewidth = 1, axis = 'y')
### top right
num = fig2.add_subplot(gs[0,1:])
numeric.plot(ax = num, color = 'tab:green', x = 'DATE', y ='DTWEXEMEGS')
numeric.plot(ax = num, color = 'cornflowerblue', x = 'DATE', y = 'DTWEXAFEGS')
numeric.plot(ax = num, color = 'red', x='DATE', y= 'EM_Rolling')
numeric.plot(ax = num, color = 'gold', x='DATE', y= 'AFE_Rolling')
num.legend(title = 'Regions', labels = ['Emerging Markets', """Advanced Foreign
Economies""", """EM 5-Day Moving
Average""", """AFE 5-Day Moving
Average"""], loc='upper right',bbox_to_anchor=(1.36, 1.02))
num.set_title('USD Trade Weighted Indices', fontsize = 16, fontweight = 'bold', fontstyle = 'oblique')
num.set_xlabel('Month', fontsize = 14)
num.grid(color = 'white', linestyle = '-.', linewidth = .5, axis = 'y')
num.set_ylabel('Index Jan 2006 = 100', fontsize = 10)
### bottom right
pct = fig2.add_subplot(gs[1:,1:])
usd_pct.plot(ax = pct, color = 'tab:green',alpha = .8, x = 'DATE', y ='DTWEXEMEGS')
usd_pct.plot(ax = pct, color = 'cornflowerblue', alpha = .8, x = 'DATE', y = 'DTWEXAFEGS')
pct.set_xlabel('Month', fontsize = 14)
pct.axhline(0, color = 'red')
pct.set_title('USD Trade Weighted Indices Daily % Change', fontsize = 16, fontweight = 'bold', fontstyle = 'oblique')
pct.yaxis.set_major_formatter(mtick.PercentFormatter(1.0))
pct.legend(title = 'Regions', labels = ['Emerging Markets', """Advanced Foreign
Economies"""],loc='upper right',bbox_to_anchor=(1.36, 1.02))
pct.grid(color = 'white', linestyle = '-.', linewidth = .5, axis = 'y')
us_death_level = pd.read_csv('daily_death_us.csv')
us_death_level['datetime'] =pd.to_datetime(us_death_level .datetime)
us_death_level = pd.DataFrame(us_death_level.groupby('datetime')['daily_death'].sum())
us_death_level = us_death_level.reset_index('datetime')
#us_death_level
combine = pd.merge(us_death_level, usd_pct,left_on='datetime', right_on="DATE")
combine = combine[['datetime','daily_death','DTWEXEMEGS','DTWEXAFEGS']]
combine = combine.set_index('datetime')
combine
## Correlation matrix
rs = np.random.RandomState(0)
df = pd.DataFrame(rs.rand(10, 10))
corr = combine.corr()
corr.style.background_gradient(cmap='coolwarm')
The first graph depicts daily U.S. COVID cases from 1/21 - 11/6 on the logarithmic scale. It is visible that from February to March cases and deaths more than doubled, experiencing the greatest growth of any month. From March to April the growth slows however both cases and deaths still nearly double. In the months after the curve begins to flatten and in October begins to decrease.
The graph in the top right corner shows a Trade Weighted USD compared to Advanced Foreign Economies (AFE) and Emerging Markets (EM) for goods & services. There is an initial appreciation of the dollar against both regions before depreciation beginning in late March. This depreciation trend continues through the end of the graphed time period. The dollar is weaker in November than in March.
The lower right corner graph visualizes daily growth rates in both Trade Weighted USD vs. AFE and EM for goods & services. Heading into March volatility begins to increase and growth rates will break +2% and nearly break -2%. The USD had both substantial increases and decreases relative to both regions, appreciating 2% against EMs and depreciating nearly -2% to AFEs. From the end of March onwards volatility decreased, and with exception to a few days, remained between -1% and 1%.
What’s interesting is that in March, when the U.S. COVID cases and deaths growth substantially increases there is both an increase in volatility and an appreciation of the USD. However, when COVID cases begin to flatten circa beginning of April both Trade Weighted Indices also flatten until late-May, early-June before a depreciation trend. Volatility also decreases from the beginning of April, remaining in the -1% to 1% bounds throughout the rest of the time period. However, after April, as U.S. case and death curves continue to flatten, the depreciation trend continues. Thus, the depreciation of the USD is likely not solely due to COVID but relates to other factors in the economy at large.
However, during the period the USD actually appreciated to EMs by 3.1615 from 122.1471 to 125.3086 while the USD depreciated to AFEs by 4.6691 from 109.7980 to 105.1289.
usd_rmb.dtypes
usd_rmb.DEXCHUS = pd.to_numeric(usd_rmb.DEXCHUS, errors='coerce')
usd_rmb = usd_rmb.rename(columns={"DATE": "Date"})
usd_rmb = usd_rmb.set_index('Date')
usd_rmb = usd_rmb.loc['2020-01-21':'2020-07-31']
usd_rmb = usd_rmb.reset_index()
#usd_rmb = usd_rmb.drop(columns = ['RMB_Growth_Rate'], axis = 1)
usd_rmb['RMB_GR'] = usd_rmb['DEXCHUS'].pct_change()
usd_rmb = usd_rmb.iloc[1:]
#usd_rmb
#faangs = faangs.drop(columns = ['total', 'Month'], axis = 1)
faangs1 = faangs
faangs1['Total'] = (faangs['Close_FB'] + faangs['Close_Apple'] + faangs['Close_Amazon'] +
faangs['Close_Google'] + faangs['Close_Netflix'])
faangs1['Date'] = pd.to_datetime(faangs1['Date'])
faangs1 = faangs1.set_index('Date')
faangs1 = faangs1.loc['2020-01-21':'2020-07-31']
faangs1 = faangs1.reset_index()
faangs1 = faangs1.groupby('Date', as_index = False)['Total'].sum()
faangs1['Faang_GR'] = faangs1['Total'].pct_change()
faangs1 = faangs1.iloc[1:]
#faangs1
us_covid2 = us_counties.rename(columns={"date":"Date","state":"State","cases": "Cases","deaths":"Deaths"})
us_covid2 = us_covid2.set_index('Date')
us_covid2 = us_covid2.loc['2020-01-21':'2020-07-31']
us_covid2 = us_covid2.reset_index()
us_covid2 = us_covid2.groupby('Date', as_index = False)['Cases'].sum()
us_covid2['Case_GR'] = us_covid2['Cases'].pct_change()
us_covid2 = us_covid2.iloc[1:]
#us_covid2
covid_faang = us_covid2.merge(faangs1, on = 'Date', how = 'left')
covid_faang = covid_faang.dropna()
covid_faang = usd_rmb.merge(covid_faang, on = 'Date', how = 'left')
covid_faang = covid_faang.drop(columns = ['Total','Cases','DEXCHUS'])
covid_faang = covid_faang.dropna()
covid_faang = covid_faang.drop(2)
#covid_faang
reg = linreg().fit(X = covid_faang[['Case_GR']], y = covid_faang['Faang_GR'])
covid_faang['yhat1'] = reg.predict(covid_faang[['Case_GR']])
reg2 = linreg().fit(X = covid_faang[['RMB_GR']], y = covid_faang['Faang_GR'])
covid_faang['yhat2'] = reg.predict(covid_faang[['RMB_GR']])
plt.style.use('dark_background')
fig, ax = plt.subplots(nrows = 1, ncols = 2, figsize = (14,6))
#COVID, FAANG
covid_faang.plot.scatter(ax = ax[0], x = 'Case_GR',y='Faang_GR')
covid_faang.sort_values('Case_GR').set_index('Case_GR')['yhat1'].plot(ax = ax[0], color = 'gold', lw = 4)
vals = ax[0].get_yticks()
ax[0].set_yticklabels(['{:,.2%}'.format(x) for x in vals])
valsx = ax[0].get_xticks()
ax[0].set_xticklabels(['{:,.2%}'.format(y) for y in valsx])
ax[0].set_title('U.S. COVID Case GR v. FAANG GR', fontsize = 18, fontweight = 'bold')
ax[0].set_ylabel('FAANG Stock Growth Rate', fontsize = 14)
ax[0].set_xlabel('U.S. COVID Case Growth Rate', fontsize = 14)
#RMB, FAANG
covid_faang.plot.scatter(ax = ax[1], x = 'RMB_GR', y = 'Faang_GR')
covid_faang.sort_values('RMB_GR').set_index('RMB_GR')['yhat2'].plot(ax = ax[1], color = 'gold',lw = 4)
vals1 = ax[1].get_yticks()
ax[1].set_yticklabels(['{:,.2%}'.format(x) for x in vals1])
valsx1 = ax[1].get_xticks()
ax[1].set_xticklabels(['{:,.2%}'.format(y) for y in valsx1])
ax[1].set_title('USD/RMB ER v. FAANG GR', fontsize = 18, fontweight = 'bold')
ax[1].set_ylabel('USD v. RMB Growth Rate', fontsize = 14)
ax[1].set_xlabel('U.S. COVID Case Growth Rate', fontsize = 14)
reg.score(X = covid_faang[['Case_GR']], y = covid_faang['Faang_GR'])
reg2.score(X = covid_faang[['RMB_GR']], y = covid_faang['Faang_GR'])
covid_faang = covid_faang.set_index('Date')
covid_faang = covid_faang.iloc[:,0:3]
covid_faang
## Correlation matrix
rs = np.random.RandomState(0)
df = pd.DataFrame(rs.rand(10, 10))
corr = covid_faang.corr()
corr.style.background_gradient(cmap='coolwarm')
The graphs depict the relationships between USD v. RMB and FAANG stock growth rates with U.S. COVID case percent change from 1/21 - 9/31. It is visible that increases in COVID growth rates caused a decrease in FAANG growth rate, while a slight increase in USD v. RMB growth rate. While it is expected that increased COVID cases is correlated worse stock performance, it is surprising that increases in COVID growth had a relationship with USD appreciation.
The analysis that US COVID Case GR and FAANGs are negatively correlated. However, there is a slight positive correlation between the the USD v. RMB GR and FAANG GR.
df = pd.read_csv('daily_death_us.csv')
df['datetime'] =pd.to_datetime(df.datetime)
df.head()
df_death_daily = df[['datetime','location_name','daily_death']]
#df_death_daily
df_death_daily['month'] = df_death_daily['datetime'].dt.month
#df_death_daily
df_death_daily1 = pd.DataFrame(df_death_daily.groupby(['month','location_name'])['daily_death'].sum())
df_death_daily1 = df_death_daily1.reset_index('month')
df_death_daily1 = df_death_daily1.reset_index('location_name')
#df_death_daily1
sns.set(style="ticks", context="talk")
sns.set(rc={'figure.figsize':(11.7,8.27)})
plt.style.use("dark_background")
sns.violinplot(x="month", y="daily_death", data=df_death_daily1)
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.title('Violin Plot for the deaily death cases for different states in US',fontsize=20)
plt.style.use("dark_background")
#sns.set(style="ticks", context="talk")
sns.lineplot(data=df_death_daily, x="datetime", y="daily_death")
sns.set(rc={'figure.figsize':(11.7,8.27)})
plt.title('State level daily death cases in US - General form',fontsize=20)
sns.set(rc={'figure.figsize':(11.7,8.27)})
sns.set(style="ticks", context="talk")
plt.style.use("dark_background")
sns.lineplot(data=df_death_daily, x="datetime", y="daily_death",hue='location_name')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.title('Daily death cases for different states',fontsize=20)
state_level = pd.DataFrame(us_counties.groupby(['date','state'])['deaths'].sum())
state_level = state_level.reset_index('state')
state_level = state_level.reset_index('date')
state_level = state_level.sort_values(by=['date','state'])
state_level = state_level.set_index('date')
#state_level
state = pd.Series(state_level['state'].unique())
#plt.style.available
#plt.style.use('dark_background')
#fig,ax = plt.subplots()
#for i in state:
# state_level.loc[state_level['state'] == i,:]['deaths'].plot(ax =ax,figsize = (20,10),label=i)
# ax.legend(loc='best')
#ax.legend()
#state_level['deaths'].mean().plot(ax =ax,figsize = (10,5))
#state_level['death'].plot(ax =ax,figsize = (10,5))
#plt.plot(state_level['date'],state_level['deaths'],label=state_level['state'])
#plot(state_level['death'], label=state_level['state'])
#state_level.iloc[:,1]
state_level_pivot = pd.pivot_table(state_level,index=['date'],columns=['state'],
values=['deaths'],fill_value=np.nan)
state_level_pivot = state_level_pivot.dropna()
#state_level_pivot
for i in range(0,55):
state_level_pivot['death_'+str(i)] = state_level_pivot.iloc[:,i].rolling(window=7).mean()
#state_level_pivot
import datetime
df2_2 = state_level.reset_index('date')
df2_2['Date'] = pd.to_datetime(df2_2['date'])
df2_2['Date'] = df2_2['Date'].dt.strftime('%d.%m.%Y')
df2_2['month'] = pd.DatetimeIndex(df2_2['Date']).month
df2_2['day'] = pd.DatetimeIndex(df2_2['Date']).day
df2_2['dayofyear'] = pd.DatetimeIndex(df2_2['Date']).dayofyear
df2_2['weekofyear'] = pd.DatetimeIndex(df2_2['Date']).weekofyear
df2_2['weekday'] = pd.DatetimeIndex(df2_2['Date']).weekday
df2_2['quarter'] = pd.DatetimeIndex(df2_2['Date']).quarter
df2_2['is_month_start'] = pd.DatetimeIndex(df2_2['Date']).is_month_start
df2_2['is_month_end'] = pd.DatetimeIndex(df2_2['Date']).is_month_end
df2_2 = df2_2.drop(['Date'], axis = 1)
df2_2 = df2_2.drop(['date'], axis = 1)
df2_2= pd.get_dummies(df2_2, columns=['month'], drop_first=True, prefix='month')
df2_2 = pd.get_dummies(df2_2, columns=['weekday'], drop_first=True, prefix='wday')
df2_2 = pd.get_dummies(df2_2, columns=['quarter'], drop_first=True, prefix='qrtr')
df2_2= pd.get_dummies(df2_2, columns=['is_month_start'], drop_first=True, prefix='m_start')
df2_2 = pd.get_dummies(df2_2, columns=['is_month_end'], drop_first=True, prefix='m_end')
df2_2= pd.get_dummies(df2_2, columns=['state'], drop_first=True, prefix='state')
#df2_2
#df2_2
from sklearn.model_selection import train_test_split
X = df2_2.drop(columns=["deaths"]).values
y = df2_2.deaths.values
X_train, X_holdout, y_train, y_holdout = train_test_split(X, y, shuffle=False, test_size=0.5, random_state = 0)
X_val, X_test, y_val, y_test = train_test_split(X_holdout, y_holdout, shuffle=False, test_size=0.5, random_state = 0)
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
model_1 = LinearRegression()
model_1.fit(X_train,y_train)
y_predicted = model_1.predict(X_val)
MAE_sklearn = mean_absolute_error(y_val, y_predicted)
yy = model_1.predict(X_train)
MAE_sklearn_train = mean_absolute_error(y_train, yy)
MAE_sklearn
us_level = pd.DataFrame(state_level.groupby('date')['deaths'].sum())
#us_level
#path_netflix = 'Netflix.csv'
netflix = pd.read_csv(path_netflix).tail(365)
netflix['Date'] = pd.to_datetime(netflix['Date'])
netflix = netflix[netflix['Date'].dt.year == 2020]
#pd.reset_option('display.max_rows', None)
#pd.set_option('display.max_rows', None)
netflix = netflix.set_index('Date')
#netflix
netflix_sub = netflix.iloc[:,3:4]
#netflix_sub
#path_amazon = 'Amazon.csv'
amazon = pd.read_csv(path_amazon).tail(365)
amazon['Date'] = pd.to_datetime(amazon['Date'])
amazon = amazon[amazon['Date'].dt.year == 2020]
#pd.reset_option('display.max_rows', None)
amazon = amazon.set_index('Date')
amazon_sub = amazon.iloc[:,3:4]
#amazon_sub
amazon_sub1 = amazon_sub.reset_index('Date')
#amazon_sub1
#path_google = '/Users/jarrodhoran/Downloads/Google.csv'
google = pd.read_csv(path_google).tail(365)
google['Date'] = pd.to_datetime(google['Date'])
google = google[google['Date'].dt.year == 2020]
#pd.reset_option('display.max_rows', None)
google = google.set_index('Date')
#google
google_sub = google.iloc[:,3:4]
#google_sub
google_sub1 = google_sub.reset_index('Date')
#google_sub1
#path_apple = '/Users/jarrodhoran/Downloads/Apple.csv'
apple = pd.read_csv(path_apple).tail(365)
apple['Date'] = pd.to_datetime(apple['Date'])
apple = apple[apple['Date'].dt.year == 2020]
#pd.reset_option('display.max_rows', None)
apple = apple.set_index('Date')
#apple
apple_sub = apple.iloc[:,3:4]
apple_sub1 = apple_sub.reset_index('Date')
#apple_sub1
#path_facebook = '/Users/jarrodhoran/Downloads/Facebook.csv'
facebook = pd.read_csv(path_facebook).tail(365)
facebook = facebook
facebook['Date'] = pd.to_datetime(facebook['Date'])
facebook = facebook[facebook['Date'].dt.year == 2020]
#pd.reset_option('display.max_rows', None)
facebook = facebook.set_index('Date')
#facebook
facebook_sub = facebook.iloc[:,3:4]
facebook_sub1 = facebook_sub.reset_index('Date')
#facebook_sub1
fig, ax1 = plt.subplots()
fig.suptitle('Deaths Cases for US by Date & Trends with the FAANG Stocks Prices by Date',fontsize=25)
us_level['deaths'].plot(ax =ax1,figsize = (20,10),lw=3.5,alpha = 0.5)
#us_level['SMA_3'] = us_level.iloc[:,0].rolling(window=3).mean()
#us_level['SMA_4'] = us_level.iloc[:,0].rolling(window=4).mean()
us_level['SMA_5'] = us_level.iloc[:,0].rolling(window=5).mean()
us_level['CMA_5'] = us_level.iloc[:,0].expanding(min_periods=5).mean()
us_level['EMA'] = us_level.iloc[:,0].ewm(span=40,adjust=False).mean()
#us_level['SMA_3'].plot(ax=ax,lw=4)
#us_level['SMA_4'].plot(ax=ax)
us_level['SMA_5'].plot(ax=ax1,alpha = 0.5,lw=3.5)
us_level['CMA_5'].plot(ax=ax1,alpha = 0.5,lw=3.5)
us_level['EMA'].plot(ax=ax1,alpha = 0.5,lw=3.5)
ax1.legend(fontsize=25)
plt.savefig('COVID19.png')
#amazon_sub1
#us_level
netflix_sub1 = netflix_sub.reset_index('Date')
fig,ax = plt.subplots(5,figsize=(20,25),sharex=True)
from statsmodels.tsa.stattools import adfuller
def test_stationarity(timeseries,i,name):
#Determing rolling statistics
timeseries['rolmean'] = timeseries.iloc[:,1:2].rolling(window=12).mean()
timeseries['rolstd']= timeseries.iloc[:,1:2].rolling(window=12).std()
#Plot rolling statistics:
plt.style.use('dark_background')
timeseries.plot(x='Date',y='Close',color='blue',label='Original'+' - '+str(name).title(),ax=ax[i])
timeseries.plot(x='Date',y='rolmean',color='red', label='12 Day Rolling Mean',ax=ax[i])
timeseries.plot(x='Date',y='rolstd',color='orange', label = '12 Day Rolling Std',ax=ax[i])
ax[i].legend(loc='best')
#ax[i].show(block=False)
#ax[0].title('Rolling Mean & Standard Deviation - Netflix')
fig.suptitle('Change of FAANG Stock Price by Date',fontsize = 30)
test_stationarity(netflix_sub1,0,'netflix')
test_stationarity(google_sub1,1,'google')
test_stationarity(apple_sub1,2,'apple')
test_stationarity(amazon_sub1,3,'amazon')
test_stationarity(facebook_sub1,4,'facebook')
plt.savefig('FAANG1.png')
Through the graph for the US state level daily death case, we could find that bascially contain as the level of 20 people dead per day after June until November. As for the relationship for the FAANG stock closing price and the US level deaths, we could see that the stock price for all five companies were maintain as a relatively stable level. However, all of the price were decrease around the mid-March which is the time of the COVID-19 outbreaked in US. It is also interesing to think about why the stock price were increase when the death toll increase significantly after July.
netflix_sub1 = netflix_sub1.rename(columns={"Close": "Close_Netflix",'rolmean':'rolmean_Netflix',
'rolstd':'rolstd_Netflix'})
amazon_sub1 = amazon_sub1.rename(columns={"Close": "Close_Amazon",'rolmean':'rolmean_Amazon',
"rolstd":'rolstd_Amazon'})
apple_sub1 = apple_sub1.rename(columns={"Close": "Close_Apple",
'rolmean':'rolmean_Apple',
'rolstd':'rolstd_Apple'})
google_sub1 = google_sub1.rename(columns={"Close": "Close_Google",
"rolmean":"rolmean_Google",
"rolstd":'rolstd_Google'})
facebook_sub1 = facebook_sub1.rename(columns={"Close": "Close_Facebook",
'rolmean':'rolmean_FB',
'rolstd':'rolstd_FB'})
#facebook_sub1
stonks = apple_sub1.merge(google_sub1, on = 'Date', how = 'left')
stonks = stonks.merge(amazon_sub1, on = 'Date', how = 'left')
stonks = stonks.merge(netflix_sub1, on ='Date', how = 'left')
stonks = stonks.merge(facebook_sub1, on ='Date', how = 'left')
#stonks = stonks.rename(columns = {"Close_x": "Close_Apple"})
stonks = stonks.set_index('Date')
normalized_stonks = pd.DataFrame(index = stonks.index)
from sklearn import preprocessing
x = stonks.values #returns a numpy array
min_max_scaler = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(x)
normalized_stonks = pd.DataFrame(x_scaled)
normalized_stonks = normalized_stonks.rename(columns = {0:"Close_Apple",1:'rolmean_Apple',2:'rolstd_Apple',
3: "Close_Google",4:"rolmean_Google",5:'rolstd_Google',
6: "Close_Amazon",7:'rolmean_Amazon',8:'rolstd_Amazon',
9: "Close_Netflix",10:'rolmean_Netflix',11:'rolstd_Netflix',
12: "Close_Facebook",13:'rolmean_FB',14:'rolstd_FB'})
normalized_stonks=normalized_stonks.dropna()
#normalized_stonks
reg1 = smf.ols('Close_Google ~ Close_Amazon',normalized_stonks).fit()
normalized_stonks['yhat1'] = reg1.predict()
reg2 = smf.ols('Close_Google ~ Close_Apple',normalized_stonks).fit()
normalized_stonks['yhat2'] = reg2.predict()
reg3 = smf.ols('Close_Google ~ Close_Netflix',normalized_stonks).fit()
normalized_stonks['yhat3'] = reg3.predict()
reg4 = smf.ols('Close_Google ~ Close_Facebook',normalized_stonks).fit()
normalized_stonks['yhat4'] = reg4.predict()
fig,ax = plt.subplots(nrows = 2, ncols = 2, figsize = (16,14))
#top left
normalized_stonks.plot.scatter(ax = ax[0,0], x = 'Close_Amazon', y = 'Close_Google')
normalized_stonks.sort_values('Close_Amazon').set_index('Close_Amazon')['yhat1'].plot(ax = ax[0,0],
color = 'orchid',lw = 4)
ax[0,0].set_xlabel('Amazon Daily Close', fontsize = 14)
ax[0,0].set_ylabel('Google Daily Close', fontsize = 14)
ax[0,0].set_title('Amazon & Google', fontsize = 16, fontweight = 'bold')
#top right
normalized_stonks.plot.scatter(ax = ax[0,1], x = 'Close_Apple', y = 'Close_Google')
normalized_stonks.sort_values('Close_Apple').set_index('Close_Apple')['yhat2'].plot(ax = ax[0,1],
color = 'orchid',lw = 4)
ax[0,1].set_xlabel('Apple Daily Close', fontsize = 14)
ax[0,1].set_ylabel('Google Daily Close', fontsize = 14)
ax[0,1].set_title('Apple & Google', fontsize = 16, fontweight = 'bold')
#bottom left
normalized_stonks.plot.scatter(ax = ax[1,1], x = 'Close_Facebook', y = 'Close_Google')
normalized_stonks.sort_values('Close_Facebook').set_index('Close_Facebook')['yhat4'].plot(ax = ax[1,1],
color = 'orchid',lw = 4)
ax[1,1].set_xlabel('Facebook Daily Close', fontsize = 14)
ax[1,1].set_ylabel('Google Daily Close', fontsize = 14)
ax[1,1].set_title('Facebook & Google', fontsize = 16, fontweight = 'bold')
#bottom right
normalized_stonks.plot.scatter(ax = ax[1,0], x = 'Close_Netflix', y = 'Close_Google')
normalized_stonks.sort_values('Close_Netflix').set_index('Close_Netflix')['yhat3'].plot(ax = ax[1,0],
color = ['orchid']
,lw = 4)
ax[1,0].set_xlabel('Netflix Daily Close', fontsize = 14)
ax[1,0].set_ylabel('Google Daily Close', fontsize = 14)
ax[1,0].set_title('Netflix & Google', fontsize = 16, fontweight = 'bold')
print(reg4.summary(),reg2.summary(),reg1.summary(),reg3.summary())
The charts above use a normalized end-of-day price for the FAANG stocks. The analysis reveals that when predicting the stock price of Google, the end-of-day price for Facebook is the best predictor with an R^2 of .848. The remaining order is Apple (R^2: .762), Amazon (R^2: .361), and Netflix (R^2: .301). Thus, it is likely that the stock prices for Facebook and Google experienced the most similar percent change throughout the time period.
There is a low P-value, which is indicative that the results are unlikely to occur randomly.